Web Scraping
# get data via web scraping from yahoo finance
# focus is on 27 companies that were listed in Dow for 2+ years
companies <- c("MMM", "AXP", "AAPL", "BA", "CAT", "CVX",
"CSCO", "KO", "XOM", "GS", "HD",
"IBM", "INTC", "JNJ", "JPM", "MCD", "MRK",
"MSFT", "NKE", "PG", "TRV", "UNH",
"VZ", "V", "WMT", "WBA", "DIS")
companies.df.list <- rep(NA, length(companies))
for (i in 1:length(companies)){
assign(paste("data", companies[i], sep = ""),
getSymbols(companies[i], auto.assign = F, from ="2019-07-01", to = "2020-06-30"))
}
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
# datasets are labeled as 'data[STOCK]' e.g. dataAAPL
companies.df <- list(dataMMM, dataAXP, dataAAPL, dataBA, dataCAT, dataCVX,
dataCSCO, dataKO, dataXOM, dataGS, dataHD,
dataIBM, dataINTC, dataJNJ, dataJPM, dataMCD, dataMRK,
dataMSFT, dataNKE, dataPG, dataTRV, dataUNH,
dataVZ, dataV, dataWMT, dataWBA, dataDIS)
Data Cleaning
# create a dataset with 27 stocks and 252 trading days
# 27 stocks (rows) and 252 returns (columns/features/predictors)
companies.closings <- matrix(data = NA, nrow = length(companies),
ncol = length(dataMMM$MMM.Close))
for (i in 1:length(companies.df)){
companies.closings[i,] <- as.numeric(companies.df[[i]][,4]) # closings are on the 4th columnm
}
# change the names of the rows
rownames(companies.closings) <- companies
# take the transpose
# each row is a trading day with 29 different stock prices
# each column is a stock
companies.closings.t <- t(companies.closings)
day <- c(1:nrow(companies.closings.t))
df = as.data.frame(cbind(day, companies.closings.t))
install.packages("plotly")
## Warning: package 'plotly' is in use and will not be installed
library(plotly)
asset1 <- plot_ly(data = df, x = ~day, y = ~MMM, name = 'MMM', type = 'scatter', mode = 'lines',
line = list(color = 'rgb(1, 1, 1)'))
for (i in 2:27){
asset1 <- asset1 %>% add_trace(y = df[,i], name = companies[i], line = list(color = 'rgb(i, i, i)'))
}
Asset 1: Stock Trends
asset1 <- plot_ly(data = df, x = ~day, y = ~MMM, name = 'MMM',
type = 'scatter', mode = 'lines', line = list(color = 'rgb(1, 1, 1)'))
for (i in 2:27){
asset1 <- asset1 %>% add_trace(y = df[,i], name = companies[i], line = list(color = 'rgb(i, i, i)'))
}
asset1 <- asset1 %>%
add_trace(x = 0, y = c(0, 400), name = 'Quarter 1',
line = list(color = 'rgb(100, 100, 100)', dash = 'dash')) %>%
add_trace(x = nrow(df)/4, y = c(0, 400), name = 'Quarter 2',
line = list(color = 'rgb(100, 100, 100)', dash = 'dash')) %>%
add_trace(x = 2*nrow(df)/4, y = c(0, 400), name = 'Quarter 3',
line = list(color = 'rgb(100, 100, 100)', dash = 'dash')) %>%
add_trace(x = 3*nrow(df)/4, y = c(0, 400), name = 'Quarter 4',
line = list(color = 'rgb(100, 100, 100)', dash = 'dash')) %>%
layout(title = 'Closing Stock Prices of Dow Jones (2019 - Q3 to 2020 - Q2)',
xaxis = list(title = 'Day', zeroline = TRUE),
yaxis = list(title = 'Closing Price ($)'))
asset1
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
Asset 2: PCA Biplots
# half year cutoff
half = nrow(companies.closings.t)/2
pca_2019 <- prcomp(companies.closings.t[1 : half, ], scale = TRUE, center = TRUE)
pca_2020 <- prcomp(companies.closings.t[(half + 1) : (2*half), ], scale = TRUE, center = TRUE)
par(mfrow=c(2,1))
biplot(pca_2019)
biplot(pca_2020)

par(mfrow=c(1,1))